#!/usr/bin/python
#coding: utf-8

import sys
sys.path.append("..")

from MySql_InterFace.PageMySql import AllPageMySql
from Logging.UrlLogging import log_Main

class EveryPage(object):
    def __init__(self):
        self.AllUrl = set([])

    def getAllUrl(self, html):
        # print len(html)
        # log_Main(u"获取每一个页面中指定的链接", "info")
        import lxml.html
        tree = lxml.html.fromstring(html)
        for t in tree.cssselect("div.main > div#left > ul.app-list > li > div.app-info > h1 > a"):
            url = t.get("href")
            # print url
            import urlparse
            url = urlparse.urljoin("http://www.appchina.com", url)
            text = t.text_content()
            # 这里不再写入日志，因为到后期发现日志会超出可以存储的最大上限
            # log_Main(u"获取到的链接:'%s'" % url, 'info')
            self.AllUrl.add((url, text))
            # print self.AllUrl
        # 把数据插入到数据库中
        # log_Main(u"把数据插入到数据库中", "info")
        self.InsertUrl()

    def InsertUrl(self):
        a = AllPageMySql()
        a.Create()
        a.Insert(self.AllUrl)
        a.Close()